{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 废弃"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The autoreload extension is already loaded. To reload it, use:\n",
      "  %reload_ext autoreload\n"
     ]
    }
   ],
   "source": [
    "import sys\n",
    "sys.path.append('..')\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import importlib\n",
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "# importlib.reload(utils.matplot)\n",
    "housing = pd.read_csv(\"/Users/tzp/data/housing/housing.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(20640, 10)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "housing = pd.read_csv(\"/Users/tzp/data/housing/housing.csv\")\n",
    "housing.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split \n",
    "train_set, test_set = train_test_split(housing, test_size=0.2, random_state=42)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 增加几种新加工特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "housing[\"income_cat\"] = pd.cut(housing[\"median_income\"],\n",
    "    bins=[0., 1.5, 3.0, 4.5, 6., np.inf],\n",
    "    labels=[1, 2, 3, 4, 5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:>"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjAAAAGdCAYAAAAMm0nCAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAz+0lEQVR4nO3df3RTdZ7/8Vdb2pQCKYLS0qFgZ1iBCsgvpfEnYGkHq0dHdlYdBruKurLFtXQXlF0GC7gDMiLiWEUXpM6OHIXZgVVA2gBSBim/Kh1LdVh/MNYdaLpHhQhIGtr7/WNO8iX8bEquzac8H+fkQO795NPP+75v7MubhMRYlmUJAADAILFtvQAAAIBwEWAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMbp0NYLsEtzc7MOHjyoLl26KCYmpq2XAwAAWsCyLH377bdKS0tTbOy5r7O02wBz8OBBpaent/UyAABAK3z55Zfq1avXOfe32wDTpUsXSX89AE6nM2Lz+v1+lZeXKycnR/Hx8RGbN5q09xqpz3ztvcb2Xp/U/mukvtbzer1KT08P/h4/l3YbYAIvGzmdzogHmKSkJDmdznZ5Ukrtv0bqM197r7G91ye1/xqp7+Jd6O0fvIkXAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgd2noBANBaA4vL5GuKaetltNif5+e19RKAdoMrMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA44QVYK688krFxMSccSsoKJAknThxQgUFBerevbs6d+6s8ePHy+PxhMxRV1envLw8JSUlqUePHpo2bZpOnjwZMmbLli0aNmyYHA6H+vbtq9LS0ourEgAAtCthBZjdu3fr0KFDwZvb7ZYk/fSnP5UkTZ06Ve+8845WrVqliooKHTx4UHfffXfw8U1NTcrLy1NjY6O2b9+u119/XaWlpZo1a1ZwzIEDB5SXl6fRo0erurpahYWFeuihh1RWVhaJegEAQDsQ1r/Ee8UVV4Tcnz9/vn70ox/plltu0ZEjR7Rs2TKtWLFCY8aMkSQtX75cAwYM0I4dO5SVlaXy8nJ99NFH2rhxo1JSUjRkyBDNnTtXTzzxhIqLi5WQkKAlS5YoIyNDCxculCQNGDBA27Zt06JFi5SbmxuhsgEAgMla/VUCjY2N+u1vf6uioiLFxMSoqqpKfr9f2dnZwTH9+/dX7969VVlZqaysLFVWVmrQoEFKSUkJjsnNzdXkyZNVW1uroUOHqrKyMmSOwJjCwsLzrsfn88nn8wXve71eSZLf75ff729tmWcIzBXJOaNNe6+R+swXqM0Ra7XxSsLT0p5cSj1srzVS38XPfSGtDjBr1qzR4cOH9fd///eSpPr6eiUkJKhr164h41JSUlRfXx8cc2p4CewP7DvfGK/Xq++++04dO3Y863rmzZun2bNnn7G9vLxcSUlJYdd3IYGXz9qz9l4j9Zlv7ojmtl5CWNavXx/W+Euhh+29RuoL3/Hjx1s0rtUBZtmyZRo3bpzS0tJaO0VEzZgxQ0VFRcH7Xq9X6enpysnJkdPpjNjP8fv9crvdGjt2rOLj4yM2bzRp7zVSn/kCNf5iT6x8zeZ8meO+4pa9DH4p9bC91kh9rRd4BeVCWhVgvvjiC23cuFG///3vg9tSU1PV2Niow4cPh1yF8Xg8Sk1NDY7ZtWtXyFyBTymdOub0Ty55PB45nc5zXn2RJIfDIYfDccb2+Ph4W04eu+aNJu29Ruozn685xqhvow63H5dCD9t7jdTXujlbolX/Dszy5cvVo0cP5eX9/6+GHz58uOLj47Vp06bgtv3796uurk4ul0uS5HK5VFNTo4aGhuAYt9stp9OpzMzM4JhT5wiMCcwBAAAQdoBpbm7W8uXLlZ+frw4d/v8FnOTkZE2aNElFRUV67733VFVVpQceeEAul0tZWVmSpJycHGVmZmrixIn64x//qLKyMs2cOVMFBQXBqyePPvqoPv/8c02fPl1/+tOf9NJLL2nlypWaOnVqhEoGAACmC/slpI0bN6qurk4PPvjgGfsWLVqk2NhYjR8/Xj6fT7m5uXrppZeC++Pi4rR27VpNnjxZLpdLnTp1Un5+vubMmRMck5GRoXXr1mnq1KlavHixevXqpaVLl/IRagAAEBR2gMnJyZFlnf2ji4mJiSopKVFJSck5H9+nT58LvhN/1KhR2rt3b7hLAwAAlwi+CwkAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAccL+Nmrg+3Tlk+siPqcjztKC66SBxWXyNcVEfP4/z8+L+JwAgFBcgQEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABgn7ADzl7/8RT//+c/VvXt3dezYUYMGDdKePXuC+y3L0qxZs9SzZ0917NhR2dnZ+uSTT0Lm+PrrrzVhwgQ5nU517dpVkyZN0tGjR0PGfPjhh7rpppuUmJio9PR0LViwoJUlAgCA9iasAPPNN9/ohhtuUHx8vN5991199NFHWrhwoS677LLgmAULFuiFF17QkiVLtHPnTnXq1Em5ubk6ceJEcMyECRNUW1srt9uttWvXauvWrXrkkUeC+71er3JyctSnTx9VVVXpV7/6lYqLi/Xqq69GoGQAAGC6DuEMfuaZZ5Senq7ly5cHt2VkZAT/blmWnn/+ec2cOVN33nmnJOk3v/mNUlJStGbNGt177736+OOPtWHDBu3evVsjRoyQJP3617/WbbfdpmeffVZpaWl644031NjYqNdee00JCQm6+uqrVV1dreeeey4k6AAAgEtTWAHm7bffVm5urn7605+qoqJCP/jBD/SP//iPevjhhyVJBw4cUH19vbKzs4OPSU5O1siRI1VZWal7771XlZWV6tq1azC8SFJ2drZiY2O1c+dO/eQnP1FlZaVuvvlmJSQkBMfk5ubqmWee0TfffBNyxSfA5/PJ5/MF73u9XkmS3++X3+8Pp8zzCswVyTmjTTTV6IizIj9nrBXyZ6S19XGLpv7ZJVCbXT20S0t7cin1sL3WSH0XP/eFhBVgPv/8c7388ssqKirSv/7rv2r37t36p3/6JyUkJCg/P1/19fWSpJSUlJDHpaSkBPfV19erR48eoYvo0EHdunULGXPqlZ1T56yvrz9rgJk3b55mz559xvby8nIlJSWFU2aLuN3uiM8ZbaKhxgXX2Tf33BHNtsy7fv16W+YNVzT0z2529dAu4Z4bl0IP23uN1Be+48ePt2hcWAGmublZI0aM0C9/+UtJ0tChQ7Vv3z4tWbJE+fn54a8ygmbMmKGioqLgfa/Xq/T0dOXk5MjpdEbs5/j9frndbo0dO1bx8fERmzeaRFONA4vLIj6nI9bS3BHN+sWeWPmaYyI+/77i3IjPGY5o6p9dAjXa1UO7tPTcuJR62F5rpL7WC7yCciFhBZiePXsqMzMzZNuAAQP0X//1X5Kk1NRUSZLH41HPnj2DYzwej4YMGRIc09DQEDLHyZMn9fXXXwcfn5qaKo/HEzImcD8w5nQOh0MOh+OM7fHx8bacPHbNG02ioUZfk32/nHzNMbbM39bHLCAa+mc3u3pol3D7cSn0sL3XSH2tm7MlwvoU0g033KD9+/eHbPuf//kf9enTR9Jf39CbmpqqTZs2Bfd7vV7t3LlTLpdLkuRyuXT48GFVVVUFx2zevFnNzc0aOXJkcMzWrVtDXgdzu93q16/fWV8+AgAAl5awAszUqVO1Y8cO/fKXv9Snn36qFStW6NVXX1VBQYEkKSYmRoWFhXr66af19ttvq6amRvfff7/S0tJ01113SfrrFZsf//jHevjhh7Vr1y69//77mjJliu69916lpaVJkn72s58pISFBkyZNUm1trd566y0tXrw45CUiAABw6QrrJaRrr71Wq1ev1owZMzRnzhxlZGTo+eef14QJE4Jjpk+frmPHjumRRx7R4cOHdeONN2rDhg1KTEwMjnnjjTc0ZcoU3XrrrYqNjdX48eP1wgsvBPcnJyervLxcBQUFGj58uC6//HLNmjWLj1ADAABJYQYYSbr99tt1++23n3N/TEyM5syZozlz5pxzTLdu3bRixYrz/pzBgwfrD3/4Q7jLAwAAlwC+CwkAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOGEFmOLiYsXExITc+vfvH9x/4sQJFRQUqHv37urcubPGjx8vj8cTMkddXZ3y8vKUlJSkHj16aNq0aTp58mTImC1btmjYsGFyOBzq27evSktLW18hAABod8K+AnP11Vfr0KFDwdu2bduC+6ZOnap33nlHq1atUkVFhQ4ePKi77747uL+pqUl5eXlqbGzU9u3b9frrr6u0tFSzZs0Kjjlw4IDy8vI0evRoVVdXq7CwUA899JDKysouslQAANBedAj7AR06KDU19YztR44c0bJly7RixQqNGTNGkrR8+XINGDBAO3bsUFZWlsrLy/XRRx9p48aNSklJ0ZAhQzR37lw98cQTKi4uVkJCgpYsWaKMjAwtXLhQkjRgwABt27ZNixYtUm5u7kWWCwAA2oOwA8wnn3yitLQ0JSYmyuVyad68eerdu7eqqqrk9/uVnZ0dHNu/f3/17t1blZWVysrKUmVlpQYNGqSUlJTgmNzcXE2ePFm1tbUaOnSoKisrQ+YIjCksLDzvunw+n3w+X/C+1+uVJPn9fvn9/nDLPKfAXJGcM9pEU42OOCvyc8ZaIX9GWlsft2jqn10CtdnVQ7u0tCeXUg/ba43Ud/FzX0hYAWbkyJEqLS1Vv379dOjQIc2ePVs33XST9u3bp/r6eiUkJKhr164hj0lJSVF9fb0kqb6+PiS8BPYH9p1vjNfr1XfffaeOHTuedW3z5s3T7Nmzz9heXl6upKSkcMpsEbfbHfE5o0001LjgOvvmnjui2ZZ5169fb8u84YqG/tnNrh7aJdxz41LoYXuvkfrCd/z48RaNCyvAjBs3Lvj3wYMHa+TIkerTp49Wrlx5zmDxfZkxY4aKioqC971er9LT05WTkyOn0xmxn+P3++V2uzV27FjFx8dHbN5oEk01DiyO/HufHLGW5o5o1i/2xMrXHBPx+fcVt+1LndHUP7sEarSrh3Zp6blxKfWwvdZIfa0XeAXlQsJ+CelUXbt21VVXXaVPP/1UY8eOVWNjow4fPhxyFcbj8QTfM5Oamqpdu3aFzBH4lNKpY07/5JLH45HT6TxvSHI4HHI4HGdsj4+Pt+XksWveaBINNfqa7Pvl5GuOsWX+tj5mAdHQP7vZ1UO7hNuPS6GH7b1G6mvdnC1xUf8OzNGjR/XZZ5+pZ8+eGj58uOLj47Vp06bg/v3796uurk4ul0uS5HK5VFNTo4aGhuAYt9stp9OpzMzM4JhT5wiMCcwBAAAQVoD5l3/5F1VUVOjPf/6ztm/frp/85CeKi4vTfffdp+TkZE2aNElFRUV67733VFVVpQceeEAul0tZWVmSpJycHGVmZmrixIn64x//qLKyMs2cOVMFBQXBqyePPvqoPv/8c02fPl1/+tOf9NJLL2nlypWaOnVq5KsHAABGCuslpP/93//Vfffdp6+++kpXXHGFbrzxRu3YsUNXXHGFJGnRokWKjY3V+PHj5fP5lJubq5deein4+Li4OK1du1aTJ0+Wy+VSp06dlJ+frzlz5gTHZGRkaN26dZo6daoWL16sXr16aenSpXyEGgAABIUVYN58883z7k9MTFRJSYlKSkrOOaZPnz4XfCf+qFGjtHfv3nCWBgAALiF8FxIAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcS4qwMyfP18xMTEqLCwMbjtx4oQKCgrUvXt3de7cWePHj5fH4wl5XF1dnfLy8pSUlKQePXpo2rRpOnnyZMiYLVu2aNiwYXI4HOrbt69KS0svZqkAAKAdaXWA2b17t1555RUNHjw4ZPvUqVP1zjvvaNWqVaqoqNDBgwd19913B/c3NTUpLy9PjY2N2r59u15//XWVlpZq1qxZwTEHDhxQXl6eRo8ererqahUWFuqhhx5SWVlZa5cLAADakVYFmKNHj2rChAn6j//4D1122WXB7UeOHNGyZcv03HPPacyYMRo+fLiWL1+u7du3a8eOHZKk8vJyffTRR/rtb3+rIUOGaNy4cZo7d65KSkrU2NgoSVqyZIkyMjK0cOFCDRgwQFOmTNHf/u3fatGiRREoGQAAmK5Dax5UUFCgvLw8ZWdn6+mnnw5ur6qqkt/vV3Z2dnBb//791bt3b1VWViorK0uVlZUaNGiQUlJSgmNyc3M1efJk1dbWaujQoaqsrAyZIzDm1JeqTufz+eTz+YL3vV6vJMnv98vv97emzLMKzBXJOaNNNNXoiLMiP2esFfJnpLX1cYum/tklUJtdPbRLS3tyKfWwvdZIfRc/94WEHWDefPNNffDBB9q9e/cZ++rr65WQkKCuXbuGbE9JSVF9fX1wzKnhJbA/sO98Y7xer7777jt17NjxjJ89b948zZ49+4zt5eXlSkpKanmBLeR2uyM+Z7SJhhoXXGff3HNHNNsy7/r1622ZN1zR0D+72dVDu4R7blwKPWzvNVJf+I4fP96icWEFmC+//FKPP/643G63EhMTW7Uwu8yYMUNFRUXB+16vV+np6crJyZHT6YzYz/H7/XK73Ro7dqzi4+MjNm80iaYaBxZH/n1PjlhLc0c06xd7YuVrjon4/PuKcyM+ZziiqX92CdRoVw/t0tJz41LqYXutkfpaL/AKyoWEFWCqqqrU0NCgYcOGBbc1NTVp69atevHFF1VWVqbGxkYdPnw45CqMx+NRamqqJCk1NVW7du0KmTfwKaVTx5z+ySWPxyOn03nWqy+S5HA45HA4ztgeHx9vy8lj17zRJBpq9DXZ98vJ1xxjy/xtfcwCoqF/drOrh3YJtx+XQg/be43U17o5WyKsN/HeeuutqqmpUXV1dfA2YsQITZgwIfj3+Ph4bdq0KfiY/fv3q66uTi6XS5LkcrlUU1OjhoaG4Bi32y2n06nMzMzgmFPnCIwJzAEAAC5tYV2B6dKliwYOHBiyrVOnTurevXtw+6RJk1RUVKRu3brJ6XTqsccek8vlUlZWliQpJydHmZmZmjhxohYsWKD6+nrNnDlTBQUFwSsojz76qF588UVNnz5dDz74oDZv3qyVK1dq3bp1kagZAAAYrlWfQjqfRYsWKTY2VuPHj5fP51Nubq5eeuml4P64uDitXbtWkydPlsvlUqdOnZSfn685c+YEx2RkZGjdunWaOnWqFi9erF69emnp0qXKzW3b9xYAAIDocNEBZsuWLSH3ExMTVVJSopKSknM+pk+fPhd8N/6oUaO0d+/ei10eAABoh/guJAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwTsS/CwkA0H5c+aQ9X6LriLO04DppYHGZfE0xEZ37z/PzIjofohNXYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDhhBZiXX35ZgwcPltPplNPplMvl0rvvvhvcf+LECRUUFKh79+7q3Lmzxo8fL4/HEzJHXV2d8vLylJSUpB49emjatGk6efJkyJgtW7Zo2LBhcjgc6tu3r0pLS1tfIQAAaHfCCjC9evXS/PnzVVVVpT179mjMmDG68847VVtbK0maOnWq3nnnHa1atUoVFRU6ePCg7r777uDjm5qalJeXp8bGRm3fvl2vv/66SktLNWvWrOCYAwcOKC8vT6NHj1Z1dbUKCwv10EMPqaysLEIlAwAA03UIZ/Add9wRcv/f//3f9fLLL2vHjh3q1auXli1bphUrVmjMmDGSpOXLl2vAgAHasWOHsrKyVF5ero8++kgbN25USkqKhgwZorlz5+qJJ55QcXGxEhIStGTJEmVkZGjhwoWSpAEDBmjbtm1atGiRcnNzI1Q2AAAwWVgB5lRNTU1atWqVjh07JpfLpaqqKvn9fmVnZwfH9O/fX71791ZlZaWysrJUWVmpQYMGKSUlJTgmNzdXkydPVm1trYYOHarKysqQOQJjCgsLz7sen88nn88XvO/1eiVJfr9ffr+/tWWeITBXJOeMNtFUoyPOivycsVbIn5HW1sctmvpnl0BtdvXQLi3tSTT10I7noGTv8zAajls09dAOdtbX0jnDDjA1NTVyuVw6ceKEOnfurNWrVyszM1PV1dVKSEhQ165dQ8anpKSovr5eklRfXx8SXgL7A/vON8br9eq7775Tx44dz7quefPmafbs2WdsLy8vV1JSUrhlXpDb7Y74nNEmGmpccJ19c88d0WzLvOvXr7dl3nBFQ//sZlcP7RLuuRENPbTzOSjZ08NoeQ5K0dFDO9lR3/Hjx1s0LuwA069fP1VXV+vIkSP63e9+p/z8fFVUVIS9wEibMWOGioqKgve9Xq/S09OVk5Mjp9MZsZ/j9/vldrs1duxYxcfHR2zeaBJNNQ4sjvx7nxyxluaOaNYv9sTK1xwT8fn3FbftS53R1D+7BGq0q4d2aem5EU09tOM5KNn7PGzr56AUXT20g531BV5BuZCwA0xCQoL69u0rSRo+fLh2796txYsX65577lFjY6MOHz4cchXG4/EoNTVVkpSamqpdu3aFzBf4lNKpY07/5JLH45HT6Tzn1RdJcjgccjgcZ2yPj4+35eSxa95oEg01+prs++Xka46xZf62PmYB0dA/u9nVQ7uE249o6KHdx9eOHrb1MTtVNPTQTnbU19L5LvrfgWlubpbP59Pw4cMVHx+vTZs2Bfft379fdXV1crlckiSXy6Wamho1NDQEx7jdbjmdTmVmZgbHnDpHYExgDgAAgLCuwMyYMUPjxo1T79699e2332rFihXasmWLysrKlJycrEmTJqmoqEjdunWT0+nUY489JpfLpaysLElSTk6OMjMzNXHiRC1YsED19fWaOXOmCgoKgldPHn30Ub344ouaPn26HnzwQW3evFkrV67UunXrIl89AAAwUlgBpqGhQffff78OHTqk5ORkDR48WGVlZRo7dqwkadGiRYqNjdX48ePl8/mUm5url156Kfj4uLg4rV27VpMnT5bL5VKnTp2Un5+vOXPmBMdkZGRo3bp1mjp1qhYvXqxevXpp6dKlfIQaAAAEhRVgli1bdt79iYmJKikpUUlJyTnH9OnT54LvEB81apT27t0bztIAAMAlhO9CAgAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjNOhrRcAAMCl7son17X1EsLiiLO04Lq2XQNXYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYJ6wAM2/ePF177bXq0qWLevToobvuukv79+8PGXPixAkVFBSoe/fu6ty5s8aPHy+PxxMypq6uTnl5eUpKSlKPHj00bdo0nTx5MmTMli1bNGzYMDkcDvXt21elpaWtqxAAALQ7YQWYiooKFRQUaMeOHXK73fL7/crJydGxY8eCY6ZOnap33nlHq1atUkVFhQ4ePKi77747uL+pqUl5eXlqbGzU9u3b9frrr6u0tFSzZs0Kjjlw4IDy8vI0evRoVVdXq7CwUA899JDKysoiUDIAADBdh3AGb9iwIeR+aWmpevTooaqqKt188806cuSIli1bphUrVmjMmDGSpOXLl2vAgAHasWOHsrKyVF5ero8++kgbN25USkqKhgwZorlz5+qJJ55QcXGxEhIStGTJEmVkZGjhwoWSpAEDBmjbtm1atGiRcnNzI1Q6AAAwVVgB5nRHjhyRJHXr1k2SVFVVJb/fr+zs7OCY/v37q3fv3qqsrFRWVpYqKys1aNAgpaSkBMfk5uZq8uTJqq2t1dChQ1VZWRkyR2BMYWHhOdfi8/nk8/mC971eryTJ7/fL7/dfTJkhAnNFcs5oE001OuKsyM8Za4X8GWltfdyiqX92CdRmVw/t0tKeRFMP7XgOSvY+D6PhuIXbQ7uOs10CfbPjWLd0zlYHmObmZhUWFuqGG27QwIEDJUn19fVKSEhQ165dQ8ampKSovr4+OObU8BLYH9h3vjFer1ffffedOnbseMZ65s2bp9mzZ5+xvby8XElJSa0r8jzcbnfE54w20VDjguvsm3vuiGZb5l2/fr0t84YrGvpnN7t6aJdwz41o6KGdz0HJnh5Gy3NQankP7T7OdrHjHD1+/HiLxrU6wBQUFGjfvn3atm1ba6eIqBkzZqioqCh43+v1Kj09XTk5OXI6nRH7OX6/X263W2PHjlV8fHzE5o0m0VTjwOLIv+/JEWtp7ohm/WJPrHzNMRGff19x277MGU39s0ugRrt6aJeWnhvR1EM7noOSvc/Dtn4OSuH30K7jbJdA/+w4RwOvoFxIqwLMlClTtHbtWm3dulW9evUKbk9NTVVjY6MOHz4cchXG4/EoNTU1OGbXrl0h8wU+pXTqmNM/ueTxeOR0Os969UWSHA6HHA7HGdvj4+Nt+Q+AXfNGk2io0ddk3y8nX3OMLfO39TELiIb+2c2uHtol3H5EQw/tPr529LCtj9mpWtpDk87jU9lxjrZ0vrA+hWRZlqZMmaLVq1dr8+bNysjICNk/fPhwxcfHa9OmTcFt+/fvV11dnVwulyTJ5XKppqZGDQ0NwTFut1tOp1OZmZnBMafOERgTmAMAAFzawroCU1BQoBUrVui///u/1aVLl+B7VpKTk9WxY0clJydr0qRJKioqUrdu3eR0OvXYY4/J5XIpKytLkpSTk6PMzExNnDhRCxYsUH19vWbOnKmCgoLgFZRHH31UL774oqZPn64HH3xQmzdv1sqVK7Vu3boIlw8AAEwU1hWYl19+WUeOHNGoUaPUs2fP4O2tt94Kjlm0aJFuv/12jR8/XjfffLNSU1P1+9//Prg/Li5Oa9euVVxcnFwul37+85/r/vvv15w5c4JjMjIytG7dOrndbl1zzTVauHChli5dykeoAQCApDCvwFjWhT/mlZiYqJKSEpWUlJxzTJ8+fS74LvFRo0Zp79694SwPAABcIvguJAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGCTvAbN26VXfccYfS0tIUExOjNWvWhOy3LEuzZs1Sz5491bFjR2VnZ+uTTz4JGfP1119rwoQJcjqd6tq1qyZNmqSjR4+GjPnwww910003KTExUenp6VqwYEH41QEAgHYp7ABz7NgxXXPNNSopKTnr/gULFuiFF17QkiVLtHPnTnXq1Em5ubk6ceJEcMyECRNUW1srt9uttWvXauvWrXrkkUeC+71er3JyctSnTx9VVVXpV7/6lYqLi/Xqq6+2okQAANDedAj3AePGjdO4cePOus+yLD3//POaOXOm7rzzTknSb37zG6WkpGjNmjW699579fHHH2vDhg3avXu3RowYIUn69a9/rdtuu03PPvus0tLS9MYbb6ixsVGvvfaaEhISdPXVV6u6ulrPPfdcSNABAACXpoi+B+bAgQOqr69XdnZ2cFtycrJGjhypyspKSVJlZaW6du0aDC+SlJ2drdjYWO3cuTM45uabb1ZCQkJwTG5urvbv369vvvkmkksGAAAGCvsKzPnU19dLklJSUkK2p6SkBPfV19erR48eoYvo0EHdunULGZORkXHGHIF9l1122Rk/2+fzyefzBe97vV5Jkt/vl9/vv5iyQgTmiuSc0SaaanTEWZGfM9YK+TPS2vq4RVP/7BKoza4e2qWlPYmmHtrxHJTsfR5Gw3ELt4d2HWe7BPpmx7Fu6ZwRDTBtad68eZo9e/YZ28vLy5WUlBTxn+d2uyM+Z7SJhhoXXGff3HNHNNsy7/r1622ZN1zR0D+72dVDu4R7bkRDD+18Dkr29DBanoNSy3to93G2ix3n6PHjx1s0LqIBJjU1VZLk8XjUs2fP4HaPx6MhQ4YExzQ0NIQ87uTJk/r666+Dj09NTZXH4wkZE7gfGHO6GTNmqKioKHjf6/UqPT1dOTk5cjqdF1fYKfx+v9xut8aOHav4+PiIzRtNoqnGgcVlEZ/TEWtp7ohm/WJPrHzNMRGff19xbsTnDEc09c8ugRrt6qFdWnpuRFMP7XgOSvY+D9v6OSiF30O7jrNdAv2z4xwNvIJyIRENMBkZGUpNTdWmTZuCgcXr9Wrnzp2aPHmyJMnlcunw4cOqqqrS8OHDJUmbN29Wc3OzRo4cGRzzb//2b/L7/cED43a71a9fv7O+fCRJDodDDofjjO3x8fG2/AfArnmjSTTU6Guy75eTrznGlvnb+pgFREP/7GZXD+0Sbj+ioYd2H187etjWx+xULe2hSefxqew4R1s6X9hv4j169Kiqq6tVXV0t6a9v3K2urlZdXZ1iYmJUWFiop59+Wm+//bZqamp0//33Ky0tTXfddZckacCAAfrxj3+shx9+WLt27dL777+vKVOm6N5771VaWpok6Wc/+5kSEhI0adIk1dbW6q233tLixYtDrrAAAIBLV9hXYPbs2aPRo0cH7wdCRX5+vkpLSzV9+nQdO3ZMjzzyiA4fPqwbb7xRGzZsUGJiYvAxb7zxhqZMmaJbb71VsbGxGj9+vF544YXg/uTkZJWXl6ugoEDDhw/X5ZdfrlmzZvERagAAIKkVAWbUqFGyrHO/WzomJkZz5szRnDlzzjmmW7duWrFixXl/zuDBg/WHP/wh3OUBAIBLAN+FBAAAjEOAAQAAxmk3/w7M921gcZlR7xr/8/y8tl4CAAARwxUYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4xBgAACAcQgwAADAOAQYAABgHAIMAAAwDgEGAAAYhwADAACMQ4ABAADGIcAAAADjEGAAAIBxCDAAAMA4BBgAAGAcAgwAADAOAQYAABgnqgNMSUmJrrzySiUmJmrkyJHatWtXWy8JAABEgagNMG+99ZaKior01FNP6YMPPtA111yj3NxcNTQ0tPXSAABAG4vaAPPcc8/p4Ycf1gMPPKDMzEwtWbJESUlJeu2119p6aQAAoI11aOsFnE1jY6Oqqqo0Y8aM4LbY2FhlZ2ersrLyrI/x+Xzy+XzB+0eOHJEkff311/L7/RFbm9/v1/Hjx9XBH6um5piIzWu3r776qsVjAzV+9dVXio+Pt3FVF9bh5LHIz9ls6fjxZtt6GM6xtkM09c8u7f15GE09tOM5KNn7PGzr56AUfg/tOs52CfTPjnP022+/lSRZlnX+gVYU+stf/mJJsrZv3x6yfdq0adZ111131sc89dRTliRu3Lhx48aNWzu4ffnll+fNClF5BaY1ZsyYoaKiouD95uZmff311+revbtiYiKX7r1er9LT0/Xll1/K6XRGbN5o0t5rpD7ztfca23t9Uvuvkfpaz7Isffvtt0pLSzvvuKgMMJdffrni4uLk8XhCtns8HqWmpp71MQ6HQw6HI2Rb165d7VqinE5nuzwpT9Xea6Q+87X3Gtt7fVL7r5H6Wic5OfmCY6LyTbwJCQkaPny4Nm3aFNzW3NysTZs2yeVyteHKAABANIjKKzCSVFRUpPz8fI0YMULXXXednn/+eR07dkwPPPBAWy8NAAC0sagNMPfcc4/+7//+T7NmzVJ9fb2GDBmiDRs2KCUlpU3X5XA49NRTT53xclV70t5rpD7ztfca23t9UvuvkfrsF2NZF/qcEgAAQHSJyvfAAAAAnA8BBgAAGIcAAwAAjEOAAQAAxiHAnGbr1q264447lJaWppiYGK1Zs+aCj9myZYuGDRsmh8Ohvn37qrS01PZ1tla49W3ZskUxMTFn3Orr67+fBYdp3rx5uvbaa9WlSxf16NFDd911l/bv33/Bx61atUr9+/dXYmKiBg0apPXr138Pq22d1tRYWlp6Rg8TExO/pxWH5+WXX9bgwYOD/0CWy+XSu+++e97HmNS/cOszqXdnM3/+fMXExKiwsPC840zq4elaUqNJfSwuLj5jrf379z/vY9qifwSY0xw7dkzXXHONSkpKWjT+wIEDysvL0+jRo1VdXa3CwkI99NBDKisrs3mlrRNufQH79+/XoUOHgrcePXrYtMKLU1FRoYKCAu3YsUNut1t+v185OTk6duzcX5S2fft23XfffZo0aZL27t2ru+66S3fddZf27dv3Pa685VpTo/TXfzHz1B5+8cUX39OKw9OrVy/Nnz9fVVVV2rNnj8aMGaM777xTtbW1Zx1vWv/CrU8yp3en2717t1555RUNHjz4vONM6+GpWlqjZFYfr7766pC1btu27Zxj26x/kfn6xfZJkrV69erzjpk+fbp19dVXh2y75557rNzcXBtXFhktqe+9996zJFnffPPN97KmSGtoaLAkWRUVFecc83d/93dWXl5eyLaRI0da//AP/2D38iKiJTUuX77cSk5O/v4WFWGXXXaZtXTp0rPuM71/lnX++kzt3bfffmv9zd/8jeV2u61bbrnFevzxx8851tQehlOjSX186qmnrGuuuabF49uqf1yBuUiVlZXKzs4O2Zabm6vKyso2WpE9hgwZop49e2rs2LF6//3323o5LXbkyBFJUrdu3c45xvQetqRGSTp69Kj69Omj9PT0C/4ff7RoamrSm2++qWPHjp3za0RM7l9L6pPM7F1BQYHy8vLO6M3ZmNrDcGqUzOrjJ598orS0NP3whz/UhAkTVFdXd86xbdW/qP2XeE1RX19/xr8OnJKSIq/Xq++++04dO3Zso5VFRs+ePbVkyRKNGDFCPp9PS5cu1ahRo7Rz504NGzasrZd3Xs3NzSosLNQNN9yggQMHnnPcuXoYre/zOVVLa+zXr59ee+01DR48WEeOHNGzzz6r66+/XrW1terVq9f3uOKWqampkcvl0okTJ9S5c2etXr1amZmZZx1rYv/Cqc+03knSm2++qQ8++EC7d+9u0XgTexhujSb1ceTIkSotLVW/fv106NAhzZ49WzfddJP27dunLl26nDG+rfpHgMF59evXT/369Qvev/766/XZZ59p0aJF+s///M82XNmFFRQUaN++fed97dZ0La3R5XKF/B/+9ddfrwEDBuiVV17R3Llz7V5m2Pr166fq6modOXJEv/vd75Sfn6+Kiopz/pI3TTj1mda7L7/8Uo8//rjcbnfUvkn1YrWmRpP6OG7cuODfBw8erJEjR6pPnz5auXKlJk2a1IYrC0WAuUipqanyeDwh2zwej5xOp/FXX87luuuui/pQMGXKFK1du1Zbt2694P/dnKuHqampdi7xooVT4+ni4+M1dOhQffrppzat7uIkJCSob9++kqThw4dr9+7dWrx4sV555ZUzxprYv3DqO120966qqkoNDQ0hV2ibmpq0detWvfjii/L5fIqLiwt5jGk9bE2Np4v2Pp6qa9euuuqqq8651rbqH++BuUgul0ubNm0K2eZ2u8/7erbpqqur1bNnz7ZexllZlqUpU6Zo9erV2rx5szIyMi74GNN62JoaT9fU1KSampqo7ePpmpub5fP5zrrPtP6dzfnqO1209+7WW29VTU2Nqqurg7cRI0ZowoQJqq6uPusvdtN62JoaTxftfTzV0aNH9dlnn51zrW3WP1vfImygb7/91tq7d6+1d+9eS5L13HPPWXv37rW++OILy7Is68knn7QmTpwYHP/5559bSUlJ1rRp06yPP/7YKikpseLi4qwNGza0VQnnFW59ixYtstasWWN98sknVk1NjfX4449bsbGx1saNG9uqhPOaPHmylZycbG3ZssU6dOhQ8Hb8+PHgmIkTJ1pPPvlk8P77779vdejQwXr22Wetjz/+2Hrqqaes+Ph4q6ampi1KuKDW1Dh79myrrKzM+uyzz6yqqirr3nvvtRITE63a2tq2KOG8nnzySauiosI6cOCA9eGHH1pPPvmkFRMTY5WXl1uWZX7/wq3PpN6dy+mf0DG9h2dzoRpN6uM///M/W1u2bLEOHDhgvf/++1Z2drZ1+eWXWw0NDZZlRU//CDCnCXxs+PRbfn6+ZVmWlZ+fb91yyy1nPGbIkCFWQkKC9cMf/tBavnz5977ulgq3vmeeecb60Y9+ZCUmJlrdunWzRo0aZW3evLltFt8CZ6tNUkhPbrnllmC9AStXrrSuuuoqKyEhwbr66qutdevWfb8LD0NraiwsLLR69+5tJSQkWCkpKdZtt91mffDBB9//4lvgwQcftPr06WMlJCRYV1xxhXXrrbcGf7lblvn9C7c+k3p3Lqf/cje9h2dzoRpN6uM999xj9ezZ00pISLB+8IMfWPfcc4/16aefBvdHS/9iLMuy7L3GAwAAEFm8BwYAABiHAAMAAIxDgAEAAMYhwAAAAOMQYAAAgHEIMAAAwDgEGAAAYBwCDAAAMA4BBgAAGIcAAwAAjEOAAQAAxiHAAAAA4/w/LZVWvMdGyxMAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "housing[\"income_cat\"].hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3    7236\n",
       "2    6581\n",
       "4    3639\n",
       "5    2362\n",
       "1     822\n",
       "Name: income_cat, dtype: int64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "housing['income_cat'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "housing[\"rooms_per_household\"] = housing[\"total_rooms\"]/housing[\"households\"]\n",
    "\n",
    "housing[\"bedrooms_per_room\"] = housing[\"total_bedrooms\"]/housing[\"total_rooms\"]\n",
    "\n",
    "housing[\"population_per_household\"]=housing[\"population\"]/housing[\"households\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "median_house_value          1.000000\n",
       "median_income               0.688075\n",
       "rooms_per_household         0.151948\n",
       "total_rooms                 0.134153\n",
       "housing_median_age          0.105623\n",
       "households                  0.065843\n",
       "total_bedrooms              0.049686\n",
       "population_per_household   -0.023737\n",
       "population                 -0.024650\n",
       "longitude                  -0.045967\n",
       "latitude                   -0.144160\n",
       "bedrooms_per_room          -0.255880\n",
       "Name: median_house_value, dtype: float64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "corr_matrix = housing.corr()\n",
    "corr_matrix[\"median_house_value\"].sort_values(ascending=False)\n",
    "# 可以看出median_income和目标y median_house_value最相关"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 训练前数据清理"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### SimpleImputer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.impute import SimpleImputer\n",
    "imputer = SimpleImputer(strategy=\"median\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SimpleImputer(strategy='median')"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "housing_num = housing.drop(\"ocean_proximity\", axis=1) # 没有文本属性ocean_proximity的数据副本\n",
    "imputer.fit(housing_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[-1.18490000e+02  3.42600000e+01  2.90000000e+01  2.12700000e+03\n",
      "  4.35000000e+02  1.16600000e+03  4.09000000e+02  3.53480000e+00\n",
      "  1.79700000e+05  3.00000000e+00  5.22912879e+00  2.03162434e-01\n",
      "  2.81811565e+00]\n",
      "[-1.18490000e+02  3.42600000e+01  2.90000000e+01  2.12700000e+03\n",
      "  4.35000000e+02  1.16600000e+03  4.09000000e+02  3.53480000e+00\n",
      "  1.79700000e+05  5.22912879e+00  2.03162434e-01  2.81811565e+00]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/tzp/opt/miniconda3/envs/mlflow-dev-env/lib/python3.7/site-packages/ipykernel_launcher.py:2: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.\n",
      "  \n"
     ]
    }
   ],
   "source": [
    "print(imputer.statistics_)\n",
    "print(housing_num.median().values)\n",
    "X = imputer.transform(housing_num)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### OrdinalEncoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[array(['<1H OCEAN', 'INLAND', 'ISLAND', 'NEAR BAY', 'NEAR OCEAN'],\n",
      "      dtype=object)]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([[3.],\n",
       "       [3.],\n",
       "       [3.],\n",
       "       ...,\n",
       "       [1.],\n",
       "       [1.],\n",
       "       [1.]])"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "housing_cat = housing[[\"ocean_proximity\"]] #文本/cata类型特征\n",
    "from sklearn.preprocessing import OrdinalEncoder\n",
    "ordinal_encoder = OrdinalEncoder()\n",
    "housing_cat_encoded = ordinal_encoder.fit_transform(housing_cat)\n",
    "print(ordinal_encoder.categories_)\n",
    "housing_cat_encoded"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### OneHotEncoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0., 0., 0., 1., 0.],\n",
       "       [0., 0., 0., 1., 0.],\n",
       "       [0., 0., 0., 1., 0.],\n",
       "       ...,\n",
       "       [0., 1., 0., 0., 0.],\n",
       "       [0., 1., 0., 0., 0.],\n",
       "       [0., 1., 0., 0., 0.]])"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.preprocessing import OneHotEncoder\n",
    "cat_encoder = OneHotEncoder()\n",
    "housing_cat_1hot = cat_encoder.fit_transform(housing_cat)\n",
    "housing_cat_1hot.toarray()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "num_pipeline = Pipeline([\n",
    "        ('imputer', SimpleImputer(strategy=\"median\")),\n",
    "        ('attribs_adder', CombinedAttributesAdder()),\n",
    "        ('std_scaler', StandardScaler()),\n",
    "    ])\n",
    "\n",
    "housing_num_tr = num_pipeline.fit_transform(housing_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.compose import ColumnTransformer\n",
    "\n",
    "num_attribs = list(housing_num)\n",
    "cat_attribs = [\"ocean_proximity\"]\n",
    "\n",
    "full_pipeline = ColumnTransformer([\n",
    "        (\"num\", num_pipeline, num_attribs),\n",
    "        (\"cat\", OneHotEncoder(), cat_attribs),\n",
    "    ])\n",
    "\n",
    "housing_prepared = full_pipeline.fit_transform(housing)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "## 再split\n",
    "from sklearn.model_selection import StratifiedShuffleSplit\n",
    "\n",
    "split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)\n",
    "for train_index, test_index in split.split(housing, housing[\"income_cat\"]):\n",
    "    strat_train_set = housing.loc[train_index]\n",
    "    strat_test_set = housing.loc[test_index]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12655     72100.0\n",
       "15502    279600.0\n",
       "2908      82700.0\n",
       "14053    112500.0\n",
       "20496    238300.0\n",
       "           ...   \n",
       "15174    268500.0\n",
       "12661     90400.0\n",
       "19263    140400.0\n",
       "19140    258100.0\n",
       "19773     62700.0\n",
       "Name: median_house_value, Length: 16512, dtype: float64"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "housing_labels = strat_train_set[\"median_house_value\"].copy()\n",
    "housing_labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.tree import DecisionTreeRegressor\n",
    "\n",
    "tree_reg = DecisionTreeRegressor() \n",
    "tree_reg.fit(housing_prepared, housing_labels)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "mlflow-dev-env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
